Image('californiascreenshot.png')
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from statsmodels.formula.api import ols
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.decomposition import PCA
import xgboost as xgb
from IPython.display import Image
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import reverse_geocoder as rg
import folium
dt=pd.read_csv('C:\\Users\\Subhasish Das\\Desktop\\SimplyLearn\\Project\\Kaggle\\California_Housing_Price_Prediction\\housing.csv')
dt.head()
dt.shape
dt.dtypes
dt.describe()
dt.hist(bins=50,figsize=(20,15))
plt.show()
dt.isnull().sum()
dt.total_bedrooms.unique()
ax=dt.total_bedrooms.hist(bins=40)
mean_bedrooms=dt.total_bedrooms.mean()
mean_bedrooms
median_bedrooms=dt.total_bedrooms.median()
median_bedrooms
dt.fillna(median_bedrooms,axis=1,inplace=True)
dt.isnull().sum()
sns.pairplot(dt)
plt.figure(figsize=(10,10))
sns.heatmap(dt.corr(),annot=True,cmap='coolwarm')
def reverseGeocode(coordinates):
result = rg.search(coordinates)
return (result)
if __name__=="__main__":
# Coordinates tuple.Can contain more than one pair.
coordinates =list(zip(dt['latitude'],dt['longitude'])) # generates pair of (lat,long)
data = reverseGeocode(coordinates)
dt['name'] = [i['name'] for i in data]
dt['admin1'] = [i['admin1'] for i in data]
dt['admin2'] = [i['admin2'] for i in data]
dt.head()
dt.rename(columns={'name':'City','admin1':'State','admin':'County'},inplace=True)
latitude = 37.88
longitude = -122.23
traffic_map = folium.Map(location=[latitude, longitude], zoom_start=5)
colordict = {0: 'lightblue', 1: 'lightgreen', 2: 'orange', 3: 'red'}
for latitude, longitude, City,median_house_value in zip(dt['latitude'], dt['longitude'], dt['City'], dt['median_house_value']):
folium.CircleMarker(
[latitude, longitude],
popup = ('City: ' + str(City).capitalize() + '<br>'
'median_house_value: ' + str(median_house_value)
),
color='b',
fill=True,
fill_opacity=0.7
).add_to(traffic_map)
display(traffic_map)